In [ ]:
import os
from os import path
from astropy.io import fits
import astropy.units as u
from astropy.table import Table
from astropy.constants import G

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import h5py
from sqlalchemy import func
from scipy.optimize import root

from twoface.db import (db_connect, AllStar, AllVisit, AllVisitToAllStar, 
                        StarResult, Status, JokerRun, initialize_db)

from thejoker import JokerParams, TheJoker, JokerSamples
from twoface.sample_prior import make_prior_cache
from twoface.io import load_samples
from twoface.plot import plot_data_orbits

from scipy.misc import logsumexp

In [ ]:
# Here we intentionally use the old run:
TWOFACE_CACHE_PATH = '../../cache/run2/'
figures_path = '../../paper/1-catalog/figures/'

In [ ]:
with h5py.File(path.join(TWOFACE_CACHE_PATH, 'apogee-jitter.hdf5')) as f:
    print(len(f.keys()))

In [ ]:
Session, _ = db_connect(path.join(TWOFACE_CACHE_PATH, 'apogee.sqlite'))
session = Session()

In [ ]:
# load the run parameters:
run = session.query(JokerRun).filter(JokerRun.name == 'apogee-jitter').one()
params = run.get_joker_params()

# load the posterior samples:
samples_file = path.join(TWOFACE_CACHE_PATH, '{0}.hdf5'.format(run.name))

In [ ]:
def ln_normal(x, mu, std):
    return -0.5*( ((x-mu) / std)**2 + np.log(2*np.pi*std**2))

def ln_normal_mixture(x, amp, mu, std):
    n_components = len(amp)
    
    lls = []
    for j in range(n_components):
        lls.append(ln_normal(x, mu[j], std[j]) + np.log(amp[j]))
    
    return logsumexp(lls, axis=0)

Load data by getting particular stars:


In [ ]:
# The aspcapflag bitmask removes STAR_WARN
# The starflag bitmask removes SUSPECT_BROAD_LINES 
# The logg cut remove TRGB stars - too much intrinsic jitter
stars = session.query(AllStar).join(StarResult, JokerRun, Status, AllVisitToAllStar, AllVisit)\
                              .filter(Status.id > 0)\
                              .filter(JokerRun.name == 'apogee-jitter')\
                              .filter(AllStar.aspcapflag.op('&')(2**np.array([7, 23])) == 0)\
                              .filter(AllStar.starflag.op('&')(2**np.array([17])) == 0)\
                              .filter(AllStar.logg > 2)\
                              .group_by(AllStar.apstar_id)\
                              .having(func.count(AllVisit.id) >= 10)\
                              .all()
print(len(stars))

In [ ]:
K_n = []
apogee_ids = []
with h5py.File(samples_file) as f:
    # Values that aren't filled get set to nan
    N = len(stars)
    y_nk = np.full((N, 256), np.nan)
    ln_p0 = np.full((N, 256), np.nan)
    
    for n, key in enumerate([s.apogee_id for s in stars]):
        K = len(f[key]['jitter'])
        
        s = f[key]['jitter'][:] * 1000. # km/s to m/s
        y = np.log(s**2)
        y_nk[n, :K] = y
        ln_p0[n, :K] = ln_normal(y, *params.jitter)
        
        K_n.append(K)
        apogee_ids.append(key)

K_n = np.array(K_n)
apogee_ids = np.array(apogee_ids)

# for nulling out the probability for non-existing samples
mask = np.zeros_like(y_nk)
mask[np.isnan(y_nk)] = -np.inf

In [ ]:
plt.hist(K_n)
plt.yscale('log')
plt.xlabel('$K_n$')

Hierarchical inference of jitter parameter distribution


In [ ]:
x = np.random.random(size=1000)
%timeit ln_normal_mixture(x, [0.2, 0.8], [1, 10], [1, 5])
%timeit ln_normal(x, 0.2, 0.8)

In [ ]:
class Model:
    
    def __init__(self, y_nk, K_n, ln_p0, n_components=1):
        self.y = y_nk
        self.K = K_n
        self.ln_p0 = ln_p0
        self.n_components = int(n_components)
        
        self.ln_norm_func = ln_normal
        if self.n_components > 1:
            self.ln_norm_func = ln_normal_mixture

    def ln_likelihood(self, **kwargs):
        """ Original, single Gaussian implementation """
        delta_ln_prior = self.ln_norm_func(self.y, **kwargs) - self.ln_p0
        delta_ln_prior[np.isnan(delta_ln_prior)] = -np.inf
        return logsumexp(delta_ln_prior, axis=1) - np.log(self.K)
    
    def ln_prior(self, **kwargs):
        lp = 0.
        
        amp = kwargs.get('amp', None)
        if amp is not None:
            amp = np.array(amp)
            if amp.sum() > 1:
                return -np.inf
            
            if np.any(amp < 0):
                return -np.inf
        
        # enforce ordering of the means
        if not np.allclose(np.argsort(kwargs['mu']), np.arange(self.n_components)):
            return -np.inf
        
        # 1/sigma prior
        lp += -np.sum(np.log(kwargs['std'])) 
        
        return lp
    
    def unpack_pars(self, pars):
        # TODO:
        if self.n_components == 1:
            mu, std = pars
            return dict(mu=mu, std=std)
            
        else:
            amp = np.concatenate((pars[:self.n_components-1], [1-sum(pars[:self.n_components-1])]))
            mu = pars[self.n_components-1:2*self.n_components-1]
            std = pars[2*self.n_components-1:]
            return dict(amp=amp, mu=mu, std=std)
    
    def pack_pars(self, mu, std, amp=None):
        pass

    def ln_prob(self, pars_vec):
        pars_kw = self.unpack_pars(pars_vec)
        
        lp = self.ln_prior(**pars_kw)
        if not np.isfinite(lp):
            return -np.inf

        ll_n = self.ln_likelihood(**pars_kw)
        if not np.all(np.isfinite(ll_n)):
            return -np.inf

        return np.sum(ll_n)
    
    def __call__(self, p):
        return self.ln_prob(p)

In [ ]:
# slc = (slice(0,3),) # single
# slc = np.array([512,777])# + list(range(100))) # the two minmax stars above
slc = (slice(None),) # all
# slc = np.array([225, 139])

mm = Model(y_nk[slc], K_n[slc], ln_p0[slc], n_components=1)
mm([-2, 4.]), mm([2, 4.])

In [ ]:
bins = np.linspace(-5, 18, 55)

_n_sub = y_nk[slc].shape[0]
for _n in range(min(_n_sub, 8)):
    plt.hist(y_nk[slc][_n][np.isfinite(y_nk[slc][_n])], bins=bins, 
             alpha=0.5, label='star {0}'.format(_n))

plt.legend(loc='best')
    
vals = np.linspace(bins.min(), bins.max(), 1000)
# lls = ln_normal_mixture(vals, [0.2, 0.8], [0, 1.], [6., 2.])
# plt.plot(vals, np.exp(lls))

In [ ]:
mm = Model(y_nk[slc], K_n[slc], ln_p0[slc])

# Single-component likelihood
sigma_y = 2.
# sigma_y = np.std(y_nk[slc].ravel())

lls = []
vals = np.linspace(-5, 15, 128)
for val in vals:
    lls.append(mm([val, sigma_y]).sum())
    
fig, axes = plt.subplots(1, 2, figsize=(12,5), sharex=True)

axes[0].plot(vals, lls)
axes[0].set_ylabel(r'$\ln p(\{D_n\}|\alpha)$')
axes[1].plot(vals, np.exp(lls - np.max(lls)))
axes[1].set_ylabel(r'$p(\{D_n\}|\alpha)$')

# axes[1].axvline(np.mean(y_nk[slc].ravel()))

axes[0].set_xlabel(r'$\mu_y$')
axes[1].set_xlabel(r'$\mu_y$')

axes[0].xaxis.set_ticks(np.arange(vals.min(), vals.max()+1, 2))

fig.tight_layout()

In [ ]:
# Mixture model
mmix = Model(y_nk[slc], K_n[slc], ln_p0[slc], 
             n_components=2)

lls = []
vals = np.linspace(-5, 15, 128)
for val in vals:
    lls.append(mmix([0.8, val, 10, 2, 2]))
    
fig, axes = plt.subplots(1, 2, figsize=(12,5), sharex=True)

axes[0].plot(vals, lls)
axes[0].set_ylabel(r'$\ln p(\{D_n\}|\alpha)$')
axes[1].plot(vals, np.exp(lls - np.max(lls)))
axes[1].set_ylabel(r'$p(\{D_n\}|\alpha)$')

# axes[1].axvline(np.mean(y_nk[slc].ravel()))

axes[0].set_xlabel(r'$\mu_y$')
axes[1].set_xlabel(r'$\mu_y$')

axes[0].xaxis.set_ticks(np.arange(vals.min(), vals.max()+1, 2))

fig.tight_layout()


In [ ]:
mmix = Model(y_nk[slc], K_n[slc], ln_p0[slc], 
             n_components=1)

In [ ]:
mu_grid = np.linspace(-10, 20, 27)
# var_grid = np.linspace(0.1, 10, 25)**2
std_grid = np.logspace(-3, 1.5, 25)
mu_grid, std_grid = np.meshgrid(mu_grid, std_grid)

probs = np.array([mm([m, v]) 
                  for (m, v) in zip(mu_grid.ravel(), std_grid.ravel())])

In [ ]:
probs.min(), probs.max()

In [ ]:
mu_grid.ravel()[probs.argmax()], std_grid.ravel()[probs.argmax()]

In [ ]:
plt.figure(figsize=(6,5))

plt.pcolormesh(mu_grid, std_grid,
               probs.reshape(mu_grid.shape),
               cmap='Blues', vmin=-1000, vmax=probs.max())
# plt.pcolormesh(mu_grid, std_grid,
#                np.exp(probs.reshape(mu_grid.shape)),
#                cmap='Blues')

plt.yscale('log')
plt.colorbar()
plt.xlabel(r'$\mu_y$')
plt.ylabel(r'$\sigma_y$')


In [ ]:
from scipy.optimize import minimize

In [ ]:
mmix = Model(y_nk[slc], K_n[slc], ln_p0[slc], 
             n_components=1)

In [ ]:
# p0 = [0.8, 7, 10, 2, 2]
p0 = [10., 2]
mmix(p0)

In [ ]:
res = minimize(lambda *args: -mmix(*args), x0=p0)

In [ ]:
res.x

In [ ]:
y = np.linspace(-10, 20, 256)

min_pars = mmix.unpack_pars(res.x)
ll = mmix.ln_norm_func(y, **min_pars)

fig,axes = plt.subplots(1, 2, figsize=(10,5))

axes[0].plot(y, np.exp(ll), marker='')
axes[0].set_xlim(-10, 20)
axes[0].set_xlabel(r'$y=\ln\left(\frac{s}{1\,{\rm m}\,{\rm s}^{-1}} \right)^2$')

s = np.sqrt(np.exp(y))
axes[1].plot(s, np.exp(ll) * 2/s, marker='')
axes[1].set_xlim(-0.1, 400)
axes[1].set_xlabel('$s$ [{0:latex_inline}]'.format(u.m/u.s))

fig.suptitle('inferred excess variance parameter distribution', fontsize=22)
fig.tight_layout()
fig.subplots_adjust(top=0.9)

fig.savefig(path.join(figures_path, 'infer-jitter.pdf'))

In [ ]: